/* * Copyright (C) 2013 lichtflut Forschungs- und Entwicklungsgesellschaft mbH * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.arastreju.sge.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; import org.arastreju.sge.ConversationContext; import org.arastreju.sge.inferencing.Inferencer; import org.arastreju.sge.model.Statement; import org.arastreju.sge.model.nodes.ResourceNode; import org.arastreju.sge.naming.QualifiedName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; /** * <p> * Indexer implementation using Lucene * </p> * * <p> * Created Feb 01, 2013 * </p> * * @author Timo Buhrmester */ public class ArastrejuIndex implements IndexUpdator, IndexSearcher { private static final Logger LOGGER = LoggerFactory.getLogger(ArastrejuIndex.class); private final List<Inferencer> inferencers = new ArrayList<Inferencer>(); private final ConversationContext conversationContext; private final IndexProvider provider; // ---------------------------------------------------- public ArastrejuIndex(ConversationContext cc, IndexProvider provider) { this.conversationContext = cc; this.provider = provider; } // ---------------------------------------------------- /** * Add a soft inferencer. * @param inferencer The inferencer. * @return This. */ public ArastrejuIndex add(Inferencer... inferencer) { Collections.addAll(inferencers, inferencer); return this; } // ---------------------------------------------------- /** * Index this node with all it's statements, regarding the current primary context. * If the node already has been indexed, it will be updated. * @param node The node to index. */ @Override public void index(ResourceNode node) { LOGGER.debug("Indexing ({})", node); Document doc = createDocument(node); ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); try { index.getWriter().updateDocument(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(node.toURI())), doc); //creates if nonexistent // index.getWriter().commit(); // XXX to be revised when transactions enter the play } catch (IOException e) { String msg = "caught IOException while indexing resource " + node.toURI(); LOGGER.error(msg, e); throw new IllegalStateException(msg, e); } } /** * Remove the resource identified by the qualified name form the index. * @param qn The qualified name. */ @Override public void remove(QualifiedName qn) { LOGGER.debug("remove({})", qn); ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); try { index.getWriter().deleteDocuments(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(qn.toURI()))); // index.getWriter().commit(); } catch (IOException e) { LOGGER.error("Could not remove node '{}' from index due to {}", qn, e.getMessage()); throw new IllegalStateException("Could not remove node.", e); } } @Override public IndexSearchResult search(String query) { LOGGER.debug("search({})", query); ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); org.apache.lucene.search.IndexSearcher searcher = index.getSearcher(); /* default field is 'qn' as this is the only field common to all resources. * (not that we're going to need a default field, anyway.) */ QueryParser qp = new QueryParser(Version.LUCENE_35, IndexFields.QUALIFIED_NAME, new LowercaseWhitespaceAnalyzer(Version.LUCENE_35)); qp.setAllowLeadingWildcard(true); //such queries should be avoided where possible nevertheless List<QualifiedName> resultList; try { /* we can use searcher.search(String, Collector) if we need all them results */ AllHitsCollector collector = new AllHitsCollector(); searcher.search(qp.parse(query), collector); resultList = collector.getList(); } catch (IOException e) { LOGGER.error("Caught IOException while processing query '" + query + "'", e); throw new IllegalStateException("Could not remove node.", e); } catch (ParseException e) { LOGGER.error("Caught ParseException while processing query '" + query + "'", e); throw new IllegalStateException("Could not perform search.", e); } return new FixedIndexSearchResult(resultList); } // ---------------------------------------------------- public void dump() { ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); org.apache.lucene.search.IndexSearcher searcher = index.getSearcher(); IndexReader reader = searcher.getIndexReader(); try { TopDocs top = searcher.search(new MatchAllDocsQuery(), 100); for (int i = 0; i < top.totalHits; i++) { Document doc = reader.document(top.scoreDocs[i].doc); LOGGER.info("---Document--- id: " + top.scoreDocs[i].doc); List<Fieldable> fields = doc.getFields(); for (Fieldable f : fields) { LOGGER.info("\tField: name='" + f.name() + "', val='" + f.stringValue() + "'"); } } } catch (IOException e) { String msg = "caught IOException while dumping index"; LOGGER.error(msg, e); throw new RuntimeException(msg, e); } } /* no more calls to this object after close() */ public void close() { ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); provider.release(conversationContext.getPrimaryContext()); try { index.getReader().close(); index.getWriter().close(); } catch (IOException e) { String msg = "caught IOException while closing reader/writer"; LOGGER.error(msg, e); throw new RuntimeException(msg, e); } } public void clear() { ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); try { index.getWriter().deleteAll(); // index.getWriter().commit(); } catch (IOException e) { String msg = "caught IOException while clearing index"; LOGGER.error(msg, e); throw new RuntimeException(msg, e); } } // ---------------------------------------------------- private Document createDocument(ResourceNode node) { Document doc = new Document(); doc.add(new Field(IndexFields.QUALIFIED_NAME, node.toURI(), Store.YES, Index.ANALYZED)); Set<Statement> asserted = node.getAssociations(); Set<Statement> inferred = new HashSet<Statement>(); for (Statement stmt : asserted) { for (Inferencer inferencer : inferencers) { inferencer.addInferenced(stmt, inferred); } addFields(doc, stmt); } for (Statement stmt : inferred) { addFields(doc, stmt); } return doc; } private void addFields(Document doc, Statement stmt) { doc.add(makeField(stmt)); Field f = makeGenField(stmt); if (!findValue(doc, f.name(), f.stringValue())) { doc.add(f); } } private Field makeGenField(Statement stmt) { Field f; if (stmt.getObject().isResourceNode()) { f = new Field(IndexFields.RESOURCE_RELATION, stmt.getObject().asResource().toURI(), Store.YES, Index.ANALYZED); } else { f = new Field(IndexFields.RESOURCE_VALUE, stmt.getObject().asValue().getStringValue(), Store.YES, Index.ANALYZED); //analyzed, right? } return f; } private Field makeField(Statement stmt) { Field f; if (stmt.getObject().isResourceNode()) { f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asResource().toURI(), Store.YES, Index.ANALYZED); } else { /* This replicates the behaviour of the old neo index, for now. * TODO: Should probably use different sorts of fields (like * NumericField) where applicable to leverage more of lucenes functionality */ f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asValue().getStringValue(), Store.YES, Index.ANALYZED); //analyzed, right? } return f; } private boolean findValue(Document doc, String fieldName, String val) { String[] vals = doc.getValues(fieldName); for (String v : vals) { if (v.equals(val)) { return true; } } return false; } /* this is applied whenever we search for a qn. * XXX do we actually want case-insensitive search on URI? * LuceneQueryBuilder.normalizeValue() sort of enforces/suggests this. */ private String normalizeQN(String qn) { return qn.toLowerCase(); } }